# Required Packages
import pandas as pd
import numpy as np
import pandas_datareader.data as pdr
from datetime import datetime, timedelta
# Visualisation libraries
## Text
from colorama import Fore, Back, Style
from IPython.display import Image, display, Markdown, Latex, clear_output
## progressbar
import progressbar
## plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
## seaborn
import seaborn as sns
## matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse, Polygon
from matplotlib.font_manager import FontProperties
import matplotlib.dates as mdates
plt.style.use('seaborn-whitegrid')
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['text.color'] = 'k'
%matplotlib inline
## WordCloud
from wordcloud import WordCloud
import warnings
warnings.filterwarnings("ignore")
In this article, the Communication Services Sector from Yahoo! Finance is used, and we analyze the current top tech companies' stock prices.
| Symbol | Name | Symbol | Name | Symbol | Name |
|---|---|---|---|---|---|
| GOOG | Alphabet Inc. | CHTR | Charter Communications, Inc. | BIDU | Baidu, Inc. |
| GOOGL | Alphabet Inc. | TMUS | T-Mobile US, Inc. | NTES | NetEase, Inc. |
| FB | Facebook, Inc. | VOD | Vodafone Group Plc | TEF | Telefonica, S.A. |
| T | AT&T Inc. | AMX | America Movil, S.A.B. de C.V. | CHA | China Telecom Corporation Limited |
| DIS | The Walt Disney Company | AMOV | America Movil, S.A.B. de C.V. | SIRI | Sirius XM Holdings Inc. |
| VZ | Verizon Communications Inc. | RELX | RELX PLC | EA | Electronic Arts Inc. |
| CMCSA | Comcast Corporation | BCE | BCE Inc. | CHT | Chunghwa Telecom Co., Ltd. |
| CHL | China Mobile Limited | ORAN | Orange S.A. | ||
| NFLX | Netflix, Inc. | ATVI | Activision Blizzard, Inc. |
Symbols_list = ['GOOG','GOOGL','FB','T','DIS','VZ','CMCSA','NFLX','CHTR','TMUS','VOD','AMX','AMOV','RELX','BCE',
'ORAN','ATVI','BIDU','NTES','TEF','CHA','SIRI','EA','CHT']
Symbols_Dic = {'GOOG':'Alphabet Inc.', 'GOOGL':'Alphabet Inc.', 'FB':'Facebook, Inc.','T':'AT&T Inc.',
'DIS':'The Walt Disney Company','VZ':'Verizon Communications Inc.','CMCSA':'Comcast Corporation',
'CHL':'China Mobile Limited','NFLX':'Netflix, Inc.','CHTR':'Charter Communications, Inc.',
'TMUS':'T-Mobile US, Inc.','VOD':'Vodafone Group Plc','AMX':'America Movil, S.A.B. de C.V.',
'AMOV':'America Movil, S.A.B. de C.V.','RELX':'RELX PLC','BCE':'BCE Inc.',
'ORAN':'Orange S.A.','ATVI':'Activision Blizzard, Inc.','BIDU':'Baidu, Inc.',
'NTES':'NetEase, Inc.','TEF':'Telefonica, S.A.','CHA':'China Telecom Corporation Limited',
'SIRI':'Sirius XM Holdings Inc.','EA':'Electronic Arts Inc.','CHT':'Chunghwa Telecom Co., Ltd.'}
The data is collected from a years ago until now.
start, end = [datetime(datetime.today().year-5, 1, 1), datetime.today()]
def Timeline_plot(start, end, width = 16):
fig, ax = plt.subplots(figsize=(width, 1))
Temp = pd.date_range(start, datetime(end.year, end.month, 1), freq='MS')
ax.plot((start, end), (0, 0), 'k', alpha=.5)
ax.get_xaxis().set_major_locator(mdates.MonthLocator(interval=2))
ax.get_xaxis().set_major_formatter(mdates.DateFormatter("%b %Y"))
#fig.autofmt_xdate()
_ = plt.setp((ax.get_yticklabels() + ax.get_yticklines() + list(ax.spines.values())), visible=False)
for i in Temp:
ax.scatter(i, 0, s=30, facecolor='#e74c3c', edgecolor='k')
ax.grid(False)
_ = plt.xticks(rotation=90)
Temp1 = Temp.min().replace(day=1) - timedelta(days=1)
Temp2 = Temp.max().replace(day=1) + timedelta(days=31)
_ = ax.set_xlim ([datetime(Temp1.year,Temp1.month, 1), datetime(Temp2.year,Temp2.month, 1)])
Timeline_plot(start, end)
Collecting data from Yahoo Finance!, and creating moving Averages for 10, 20 and 60 day periods of time.
def Get_Data(Inp):
Days = [10, 20, 60]
Out = pdr.DataReader(Inp, 'yahoo', start, end)
Out.insert(0, 'Symbol', Inp)
for j in Days:
column_name = "Moving Ave. %s days" % (str(j))
Out[column_name] = Out['Adj Close'].rolling(window=j, center=False).mean()
return Out
Data = Get_Data(Symbols_list[0])
Counter = 0
Progress_Bar = progressbar.ProgressBar(maxval=len(Symbols_list),
widgets=[progressbar.Bar('#', '|', '|'), progressbar.Percentage()])
Progress_Bar.start()
for i in Symbols_list[1:]:
Counter+=1
Progress_Bar.update(Counter)
Temp = Get_Data(i)
Data = pd.concat([Data, Temp])
del Temp
Progress_Bar.finish()
Data [list(set(Data.columns)-{'Symbol'})] = Data[list(set(Data.columns)-{'Symbol'})].astype(float)
|#########################################################################|100%
Displaying today's data only:
Today = Data[Data.index == Data.index[-1]].reset_index(drop = True)
Today[list(set(Today.columns)-{'Symbol'})] = Today[list(set(Today.columns)-{'Symbol'})].astype(float)
Today['Symbol'] = Today['Symbol'].replace(Symbols_Dic)
display(Today.style.hide_index().set_precision(2).bar(subset=['Volume'], align='mid', color=['Lime'])\
.bar(subset=['Adj Close'], align='mid', color=['Salmon'])\
.set_properties(subset=['Symbol'], **{'background-color': 'Indigo', 'color': 'White'}))
print('Currently, the stock with the highest volume is %s' %
Today.loc[Today.Volume == Today.Volume.max(),'Symbol'].values[0])
| Symbol | High | Low | Open | Close | Volume | Adj Close | Moving Ave. 10 days | Moving Ave. 20 days | Moving Ave. 60 days |
|---|---|---|---|---|---|---|---|---|---|
| Alphabet Inc. | 2277.14 | 2256.09 | 2261.47 | 2267.27 | 1059181.00 | 2267.27 | 2203.53 | 2128.54 | 2052.31 |
| Alphabet Inc. | 2263.47 | 2243.05 | 2250.99 | 2254.43 | 1215150.00 | 2254.43 | 2192.62 | 2117.41 | 2041.47 |
| Facebook, Inc. | 314.42 | 309.32 | 312.21 | 309.76 | 12422165.00 | 309.76 | 305.62 | 295.36 | 275.89 |
| AT&T Inc. | 29.86 | 29.51 | 29.86 | 29.63 | 39570988.00 | 29.63 | 30.06 | 29.82 | 28.96 |
| The Walt Disney Company | 186.44 | 184.60 | 185.59 | 185.49 | 6488604.00 | 185.49 | 187.20 | 188.44 | 185.90 |
| Verizon Communications Inc. | 57.76 | 57.16 | 57.37 | 57.62 | 15220334.00 | 57.62 | 57.85 | 57.07 | 55.87 |
| Comcast Corporation | 53.99 | 53.26 | 53.49 | 53.88 | 14639268.00 | 53.88 | 54.22 | 55.08 | 53.15 |
| Netflix, Inc. | 559.75 | 550.37 | 557.00 | 553.73 | 2653331.00 | 553.73 | 542.31 | 529.62 | 536.32 |
| Charter Communications, Inc. | 623.55 | 614.50 | 615.22 | 621.29 | 719170.00 | 621.29 | 615.88 | 627.01 | 622.51 |
| T-Mobile US, Inc. | 131.20 | 129.67 | 130.30 | 130.79 | 3028811.00 | 130.79 | 129.11 | 126.90 | 125.80 |
| Vodafone Group Plc | 18.68 | 18.47 | 18.50 | 18.64 | 2179223.00 | 18.64 | 18.73 | 18.76 | 18.27 |
| America Movil, S.A.B. de C.V. | 14.06 | 13.85 | 13.89 | 14.04 | 1223695.00 | 14.04 | 13.91 | 13.81 | 13.62 |
| America Movil, S.A.B. de C.V. | 14.30 | 14.03 | 14.19 | 14.23 | 1080.00 | 14.23 | 13.91 | 13.75 | 13.74 |
| RELX PLC | 26.49 | 26.24 | 26.29 | 26.47 | 338382.00 | 26.47 | 25.94 | 25.39 | 25.00 |
| BCE Inc. | 46.46 | 45.97 | 46.00 | 46.37 | 1056994.00 | 46.37 | 45.87 | 45.72 | 44.04 |
| Orange S.A. | 12.31 | 12.14 | 12.14 | 12.29 | 531858.00 | 12.29 | 12.43 | 12.45 | 12.07 |
| Activision Blizzard, Inc. | 98.25 | 96.51 | 97.00 | 97.54 | 5107020.00 | 97.54 | 95.92 | 93.80 | 94.98 |
| Baidu, Inc. | 217.37 | 207.69 | 209.60 | 216.56 | 8559513.00 | 216.56 | 220.35 | 232.72 | 261.08 |
| NetEase, Inc. | 107.21 | 103.68 | 103.82 | 106.21 | 1276616.00 | 106.21 | 104.88 | 105.36 | 113.11 |
| Telefonica, S.A. | 4.56 | 4.48 | 4.51 | 4.53 | 2196945.00 | 4.53 | 4.56 | 4.69 | 4.61 |
| Sirius XM Holdings Inc. | 6.24 | 6.16 | 6.24 | 6.20 | 15691466.00 | 6.20 | 6.25 | 6.23 | 6.12 |
| Electronic Arts Inc. | 143.88 | 141.51 | 142.59 | 142.11 | 1861271.00 | 142.11 | 139.82 | 135.45 | 138.63 |
| Chunghwa Telecom Co., Ltd. | 39.62 | 39.23 | 39.29 | 39.50 | 84205.00 | 39.50 | 39.36 | 39.16 | 39.23 |
Currently, the stock with the highest volume is AT&T Inc.
Consider AT&T Inc. for example. We hAve,
def Header(Text, L = 100, C = 'Blue', T = 'White'):
BACK = {'Black': Back.BLACK, 'Red':Back.RED, 'Green':Back.GREEN, 'Yellow': Back.YELLOW, 'Blue': Back.BLUE,
'Magenta':Back.MAGENTA, 'Cyan': Back.CYAN}
FORE = {'Black': Fore.BLACK, 'Red':Fore.RED, 'Green':Fore.GREEN, 'Yellow':Fore.YELLOW, 'Blue':Fore.BLUE,
'Magenta':Fore.MAGENTA, 'Cyan':Fore.CYAN, 'White': Fore.WHITE}
print(BACK[C] + FORE[T] + Style.NORMAL + Text + Style.RESET_ALL + ' ' + FORE[C] +
Style.NORMAL + (L- len(Text) - 1)*'=' + Style.RESET_ALL)
def Line(L=100, C = 'Blue'):
FORE = {'Black': Fore.BLACK, 'Red':Fore.RED, 'Green':Fore.GREEN, 'Yellow':Fore.YELLOW, 'Blue':Fore.BLUE,
'Magenta':Fore.MAGENTA, 'Cyan':Fore.CYAN, 'White': Fore.WHITE}
print(FORE[C] + Style.NORMAL + L*'=' + Style.RESET_ALL)
def Disp_Data(Inp, df = Data):
Out = df[df.Symbol == Inp].drop(columns=['Symbol'])
return Out
Temp = Disp_Data('T')
display(Temp.describe().round(2))
Tag = Temp.head(7).index[[0,-1]].astype(str).tolist()
Header('From %s To %s' % (Tag[0],Tag[1]))
display(Temp.head(7).dropna(axis = 1).round(2))
Tag = Temp.tail(7).index[[0,-1]].astype(str).tolist()
Header('From %s To %s' % (Tag[0],Tag[1]))
display(Temp.tail(7).round(2))
del Temp, Tag
Line()
| High | Low | Open | Close | Volume | Adj Close | Moving Ave. 10 days | Moving Ave. 20 days | Moving Ave. 60 days | |
|---|---|---|---|---|---|---|---|---|---|
| count | 1328.00 | 1328.00 | 1328.00 | 1328.00 | 1.328000e+03 | 1328.00 | 1319.00 | 1309.00 | 1269.00 |
| mean | 35.20 | 34.65 | 34.93 | 34.93 | 3.170162e+07 | 29.33 | 29.34 | 29.36 | 29.40 |
| std | 4.32 | 4.35 | 4.33 | 4.35 | 1.606388e+07 | 2.50 | 2.45 | 2.40 | 2.24 |
| min | 26.88 | 26.08 | 26.50 | 26.50 | 8.697200e+06 | 23.28 | 24.37 | 24.97 | 25.68 |
| 25% | 31.01 | 30.40 | 30.72 | 30.70 | 2.160062e+07 | 27.50 | 27.51 | 27.55 | 27.67 |
| 50% | 35.49 | 34.91 | 35.20 | 35.20 | 2.804705e+07 | 28.88 | 28.89 | 28.88 | 29.00 |
| 75% | 38.73 | 38.22 | 38.50 | 38.50 | 3.755808e+07 | 30.79 | 30.67 | 30.63 | 30.69 |
| max | 43.89 | 43.33 | 43.50 | 43.47 | 1.950827e+08 | 35.83 | 35.54 | 35.28 | 34.96 |
From 2016-01-04 To 2016-01-12 ======================================================================
| High | Low | Open | Close | Volume | Adj Close | |
|---|---|---|---|---|---|---|
| Date | ||||||
| 2016-01-04 | 34.35 | 34.00 | 34.08 | 34.35 | 31779500.0 | 25.00 |
| 2016-01-05 | 34.69 | 34.12 | 34.35 | 34.59 | 30707300.0 | 25.18 |
| 2016-01-06 | 34.21 | 33.72 | 33.82 | 34.06 | 26911800.0 | 25.14 |
| 2016-01-07 | 34.10 | 33.51 | 33.62 | 33.51 | 35111400.0 | 24.74 |
| 2016-01-08 | 33.96 | 33.41 | 33.77 | 33.54 | 28090800.0 | 24.76 |
| 2016-01-11 | 34.10 | 33.53 | 33.76 | 33.95 | 26438800.0 | 25.06 |
| 2016-01-12 | 34.19 | 33.66 | 34.10 | 33.90 | 24124100.0 | 25.02 |
From 2021-04-05 To 2021-04-13 ======================================================================
| High | Low | Open | Close | Volume | Adj Close | Moving Ave. 10 days | Moving Ave. 20 days | Moving Ave. 60 days | |
|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||
| 2021-04-05 | 30.84 | 30.60 | 30.62 | 30.71 | 38517300.0 | 30.19 | 29.80 | 29.59 | 28.80 |
| 2021-04-06 | 30.98 | 30.69 | 30.71 | 30.98 | 33666400.0 | 30.46 | 29.90 | 29.64 | 28.83 |
| 2021-04-07 | 31.15 | 30.80 | 31.01 | 30.93 | 40650500.0 | 30.41 | 29.99 | 29.70 | 28.86 |
| 2021-04-08 | 30.34 | 29.93 | 30.30 | 30.00 | 49877400.0 | 30.00 | 30.04 | 29.73 | 28.89 |
| 2021-04-09 | 30.19 | 29.91 | 30.05 | 30.04 | 33291800.0 | 30.04 | 30.09 | 29.78 | 28.92 |
| 2021-04-12 | 30.16 | 29.90 | 30.01 | 29.96 | 24830600.0 | 29.96 | 30.10 | 29.81 | 28.95 |
| 2021-04-13 | 29.86 | 29.51 | 29.86 | 29.63 | 39570988.0 | 29.63 | 30.06 | 29.82 | 28.96 |
====================================================================================================
Temp = Data[['Symbol','Adj Close']].reset_index(drop = False)
Temp['Symbol'] = Temp['Symbol'].replace(Symbols_Dic)
fig = px.line(Temp, x='Date', y='Adj Close', color= 'Symbol')
fig.update_xaxes(rangeslider_visible=True, rangeslider =dict(bgcolor = 'WhiteSmoke'),
rangeselector=dict(bgcolor='WhiteSmoke', buttons=list([
dict(count=1, label='One Day', step='day', stepmode='todate'),
dict(count=1, label='One Month', step='month', stepmode='todate'),
dict(count=6, label='Six Months', step='month', stepmode='todate'),
dict(count=1, label='This Year', step='year', stepmode='todate'),dict(step='all')])))
fig.update_layout(plot_bgcolor= 'white')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_layout(title={'text': '<b>' + 'Stock Prices Fluctuations' + '<b>',
'x':.5, 'y': .98, 'xanchor': 'center', 'yanchor': 'top'})
fig.show()
del Temp
The following table shows the Average values for all columns of the Data.
def List_Search(List, Key): return [s for s in List if Key in s]
def List_Diff(Inp_A, Inp_B):
# Inp_A: A list
# Inp_B: A list
Out=list(set(Inp_A)-set(Inp_B))
return Out
# Creating a new list of Columns
Columns = List_Diff(Data.columns.tolist(), List_Search(Data.columns.tolist(), 'Moving Ave'))
Columns = List_Diff(Columns, ['Symbol'])
Temp = ['Ave. ' + i for i in Columns]
# A new DataFrame
Ave_df = pd.DataFrame({'Symbol':Symbols_list})
for i in Temp:
Ave_df[i]=''
del Temp
# Progress Bar
Counter = 0
Progress_Bar = progressbar.ProgressBar(maxval=len(Symbols_list),
widgets=[progressbar.Bar('#', '|', '|'), progressbar.Percentage()])
Progress_Bar.start()
for i in range(len(Symbols_list)):
Counter+=1
Progress_Bar.update(Counter)
Ave_df.iloc[i,1:] = Data[Data.Symbol == Symbols_list[i]][Columns].mean().values
Progress_Bar.finish()
display(Ave_df.style.hide_index().set_precision(2).bar(subset=['Ave. Volume'], align='mid', color=['Lime'])\
.bar(subset=['Ave. Adj Close'], align='mid', color=['Salmon'])\
.set_properties(subset=['Symbol'], **{'background-color': 'DimGray', 'color': 'White'}))
Temp = pd.melt(Ave_df, id_vars=['Symbol'], value_vars=list(set(Ave_df.columns)-{'Symbol'}),
var_name='Summary', value_name='Value')
fig = make_subplots(rows=2, cols=1, vertical_spacing = 0.1, shared_yaxes=True,
subplot_titles=('Ave. Volume', 'Summary'))
# Top
fig1 = px.bar(Ave_df.round(0), x= 'Symbol', y= 'Ave. Volume', orientation='v', text = 'Ave. Volume',
hover_data= Ave_df.columns)
for i in range(len(fig1['data'])):
fig.add_trace(fig1['data'][i], row=1, col=1)
fig.update_traces(marker_line_color= 'Black', marker_line_width=1, opacity=1, row=1, col=1)
# Bottom
fig2 = px.bar(Temp.loc[Temp.Summary != 'Ave. Volume'].round(0), x= 'Symbol', y = 'Value',
color = 'Summary', orientation='v', hover_data= Temp.columns)
for i in range(len(fig2['data'])):
fig.add_trace(fig2['data'][i], row=2, col=1)
fig.update_traces(marker_line_color= 'Black', marker_line_width=1, opacity=1, showlegend = True, row=2, col=1)
# Update
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
zeroline=False, zerolinewidth=1, zerolinecolor='Black',
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
zeroline=False, zerolinewidth=1, zerolinecolor='Black', title_text='Value',
showgrid=False, gridwidth=1, gridcolor='Lightgray')
fig.update_layout(legend_orientation='v', plot_bgcolor= 'white', height= 800)
fig.update_layout(legend=dict(font=dict(color="Black"), bordercolor="Lightgray", borderwidth=1))
fig.update_layout(title={'text': '<b>' + 'Average Values' + '<b>', 'x':0.5, 'y': .95, 'xanchor': 'center', 'yanchor': 'top'})
fig.show()
|#########################################################################|100%
| Symbol | Ave. Volume | Ave. Adj Close | Ave. Low | Ave. Open | Ave. High | Ave. Close |
|---|---|---|---|---|---|---|
| GOOG | 1666631.54 | 1138.07 | 1126.59 | 1137.15 | 1148.65 | 1138.07 |
| GOOGL | 1826994.01 | 1146.41 | 1134.71 | 1145.83 | 1157.21 | 1146.41 |
| FB | 21650941.77 | 177.60 | 175.44 | 177.53 | 179.60 | 177.60 |
| T | 31701622.58 | 29.33 | 34.65 | 34.93 | 35.20 | 34.93 |
| DIS | 9776028.54 | 115.15 | 116.47 | 117.61 | 118.64 | 117.58 |
| VZ | 15442835.42 | 47.93 | 53.25 | 53.67 | 54.07 | 53.68 |
| CMCSA | 20859867.14 | 37.03 | 38.55 | 38.95 | 39.34 | 38.96 |
| NFLX | 8900617.64 | 286.17 | 281.46 | 286.10 | 290.54 | 286.17 |
| CHTR | 1593585.37 | 380.32 | 375.76 | 380.08 | 384.71 | 380.32 |
| TMUS | 4227093.76 | 72.86 | 72.03 | 72.85 | 73.64 | 72.86 |
| VOD | 4328839.02 | 19.66 | 23.08 | 23.23 | 23.38 | 23.23 |
| AMX | 3080793.90 | 13.92 | 14.58 | 14.76 | 14.94 | 14.76 |
| AMOV | 7408.64 | 13.82 | 14.51 | 14.66 | 14.83 | 14.67 |
| RELX | 385190.88 | 20.49 | 21.49 | 21.60 | 21.72 | 21.61 |
| BCE | 948449.47 | 38.91 | 44.04 | 44.32 | 44.59 | 44.32 |
| ORAN | 476539.20 | 13.37 | 15.12 | 15.21 | 15.29 | 15.21 |
| ATVI | 7667649.26 | 58.33 | 58.22 | 59.05 | 59.80 | 59.05 |
| BIDU | 3945221.70 | 177.08 | 174.43 | 177.19 | 179.72 | 177.08 |
| NTES | 4857419.29 | 57.06 | 58.15 | 59.08 | 60.01 | 59.09 |
| TEF | 1645822.40 | 7.06 | 8.20 | 8.25 | 8.31 | 8.25 |
| CHA | 62818.44 | 40.96 | 44.82 | 45.10 | 45.33 | 45.07 |
| SIRI | 26985553.97 | 5.50 | 5.55 | 5.61 | 5.68 | 5.61 |
| EA | 3545828.22 | 104.36 | 103.24 | 104.63 | 105.88 | 104.61 |
| CHT | 149289.16 | 32.39 | 35.43 | 35.57 | 35.71 | 35.57 |
def TopN_volumes(N, df = Ave_df):
Out = df.sort_values(by='Ave. Volume', ascending=False).iloc[:N,0].tolist()
return Out
# Conisder the Moving Ave and Adj Close columns
Columns = List_Search(Data.columns.tolist(), 'Moving Ave.')
Columns.append("Adj Close")
Columns = list(np.sort(Columns))
# A list of top N = 4 companies with the hightest volume on Average.
N = 4
mylist = TopN_volumes(N)
# Conisder the Moving Ave and Adj Close columns
Columns = List_Search(Data.columns.tolist(), 'Moving Ave.')
Columns.append("Adj Close")
Columns = list(np.sort(Columns))
# A list of top N = 4 companies with the hightest volume on Average.
N = 4
mylist = Ave_df.sort_values(by='Ave. Volume', ascending=False).iloc[:N,0].tolist()
for symb in mylist:
Temp = Disp_Data(symb)[Columns].reset_index(drop = False)
Temp = pd.melt(Temp, id_vars=['Date'], value_vars= List_Diff(Temp.columns, ['Date']),
var_name='Variable', value_name='Value')
fig = px.line(Temp, x='Date', y='Value', color = 'Variable')
fig.update_xaxes(rangeslider_visible=True, rangeslider =dict(bgcolor = 'WhiteSmoke'),
rangeselector=dict(bgcolor='WhiteSmoke', buttons=list([
dict(count=1, label='One Day', step='day', stepmode='todate'),
dict(count=1, label='One Month', step='month', stepmode='todate'),
dict(count=6, label='Six Months', step='month', stepmode='todate'),
dict(count=1, label='This Year', step='year', stepmode='todate'),dict(step='all')])))
fig.update_layout(plot_bgcolor= 'white')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_layout(title={'text': '<b>' + Symbols_Dic[symb] + '<b>',
'x':.5, 'y': .97, 'xanchor': 'center', 'yanchor': 'top'})
fig.show()
del Temp
del symb
Daily return can be calculated using the percentage change of the adjusted closing price.
Daily_Return = Disp_Data(mylist[0])['Adj Close'].pct_change().to_frame('Daily Return').reset_index(drop = False)
Daily_Return['Symbols'] = mylist[0]
for stock in mylist[1:]:
Temp = Disp_Data(stock)['Adj Close'].pct_change().to_frame('Daily Return').reset_index(drop = False)
Temp['Symbols'] = stock
Daily_Return = pd.concat([Daily_Return, Temp])
del Temp, stock
Daily_Return = Daily_Return.sort_values('Date')
fig = px.line(Daily_Return, x='Date', y='Daily Return', color = 'Symbols')
fig.update_xaxes(rangeslider_visible=True, rangeslider =dict(bgcolor = 'WhiteSmoke'),
rangeselector=dict(bgcolor='WhiteSmoke', buttons=list([
dict(count=1, label='One Day', step='day', stepmode='todate'),
dict(count=1, label='One Month', step='month', stepmode='todate'),
dict(count=6, label='Six Months', step='month', stepmode='todate'),
dict(count=1, label='This Year', step='year', stepmode='todate'),dict(step='all')])))
fig.update_layout(plot_bgcolor= 'white')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_layout(title={'text': '<b>' + 'Daily Returns' + '<b>',
'x':.5, 'y': .96, 'xanchor': 'center', 'yanchor': 'top'})
fig.show()
del Daily_Return
First, we need to create a new data frame by reading the Adj Close column from all stock data under study. We hAve,
All_data = pdr.DataReader(Symbols_list, 'yahoo', start, end)['Adj Close']
All_returns = All_data.pct_change()
Tag = All_data.head(7).index[[0,-1]].astype(str).tolist()
Header('From %s To %s' % (Tag[0],Tag[1]), C = 'Black')
display(All_data.head(7).dropna(axis = 1).round(4))
Tag = All_data.tail(7).index[[0,-1]].astype(str).tolist()
Header('From %s To %s' % (Tag[0],Tag[1]), C = 'Black')
display(All_data.tail(7).dropna(axis = 1).round(4))
Line(C = 'Black')
From 2016-01-04 To 2016-01-12 ======================================================================
| Symbols | GOOG | GOOGL | FB | T | DIS | VZ | CMCSA | NFLX | CHTR | TMUS | ... | BCE | ORAN | ATVI | BIDU | NTES | TEF | CHA | SIRI | EA | CHT |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||||||||||
| 2016-01-04 | 741.84 | 759.44 | 102.22 | 25.0047 | 97.1558 | 35.9547 | 25.1296 | 109.96 | 177.53 | 38.95 | ... | 29.9915 | 13.0994 | 36.3658 | 184.03 | 31.7791 | 8.0895 | 37.9867 | 3.8074 | 65.9303 | 23.9200 |
| 2016-01-05 | 742.58 | 761.53 | 102.73 | 25.1794 | 95.1934 | 36.4485 | 25.1342 | 107.66 | 175.17 | 40.22 | ... | 30.2099 | 13.0285 | 35.9018 | 187.37 | 32.1004 | 8.0296 | 38.2234 | 3.7689 | 64.7733 | 23.9685 |
| 2016-01-06 | 743.62 | 759.33 | 102.97 | 25.1425 | 94.6840 | 36.1192 | 24.9400 | 117.68 | 174.31 | 40.05 | ... | 30.3113 | 13.0994 | 35.5635 | 185.61 | 32.0653 | 7.9398 | 37.5471 | 3.7496 | 63.5165 | 23.9362 |
| 2016-01-07 | 726.39 | 741.00 | 97.92 | 24.7365 | 93.8726 | 35.9209 | 24.6645 | 114.56 | 173.63 | 40.51 | ... | 29.7965 | 13.0915 | 35.0608 | 174.37 | 30.5329 | 7.9024 | 36.3550 | 3.7014 | 62.2996 | 23.8958 |
| 2016-01-08 | 714.47 | 730.91 | 97.33 | 24.7587 | 93.6367 | 35.5717 | 24.6916 | 111.39 | 174.89 | 39.88 | ... | 29.8979 | 12.9812 | 34.5195 | 171.34 | 30.1802 | 7.7452 | 36.6255 | 3.6725 | 62.9679 | 24.0251 |
| 2016-01-11 | 716.03 | 733.07 | 97.51 | 25.0613 | 94.2688 | 35.7780 | 24.7141 | 114.97 | 175.63 | 39.68 | ... | 29.9291 | 13.0127 | 35.0028 | 170.37 | 29.3180 | 7.7003 | 36.7354 | 3.6821 | 64.0451 | 24.5504 |
| 2016-01-12 | 726.07 | 745.34 | 99.37 | 25.0244 | 95.7217 | 35.6511 | 24.7819 | 116.58 | 175.32 | 40.06 | ... | 30.4517 | 13.5014 | 35.8825 | 172.91 | 29.7002 | 7.7003 | 36.5917 | 3.6532 | 65.4515 | 24.6877 |
7 rows × 24 columns
From 2021-04-05 To 2021-04-13 ======================================================================
| Symbols | GOOG | GOOGL | FB | T | DIS | VZ | CMCSA | NFLX | CHTR | TMUS | ... | RELX | BCE | ORAN | ATVI | BIDU | NTES | TEF | SIRI | EA | CHT |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||||||||||
| 2021-04-05 | 2225.5500 | 2218.9600 | 308.91 | 30.1937 | 188.50 | 58.2434 | 54.550 | 540.67 | 605.50 | 131.40 | ... | 26.03 | 45.86 | 12.57 | 97.87 | 222.29 | 105.54 | 4.60 | 6.30 | 141.54 | 39.51 |
| 2021-04-06 | 2224.7500 | 2209.2600 | 306.26 | 30.4592 | 189.73 | 58.4808 | 54.840 | 544.53 | 613.39 | 131.43 | ... | 25.84 | 45.89 | 12.42 | 97.32 | 226.55 | 107.27 | 4.55 | 6.34 | 141.28 | 39.49 |
| 2021-04-07 | 2249.6799 | 2239.0300 | 313.09 | 30.4100 | 187.56 | 58.3720 | 54.600 | 546.99 | 611.63 | 130.02 | ... | 26.03 | 45.93 | 12.49 | 96.84 | 222.00 | 103.39 | 4.59 | 6.33 | 142.01 | 39.61 |
| 2021-04-08 | 2265.4399 | 2250.4299 | 313.02 | 30.0000 | 187.32 | 57.6000 | 54.330 | 554.58 | 615.16 | 130.11 | ... | 26.25 | 45.97 | 12.56 | 96.46 | 226.77 | 104.93 | 4.60 | 6.31 | 141.55 | 39.29 |
| 2021-04-09 | 2285.8799 | 2270.6699 | 312.46 | 30.0400 | 187.89 | 57.4900 | 53.570 | 555.31 | 614.24 | 129.03 | ... | 26.43 | 46.22 | 12.50 | 95.78 | 219.67 | 103.94 | 4.57 | 6.35 | 140.46 | 39.40 |
| 2021-04-12 | 2254.7900 | 2244.6201 | 311.54 | 29.9600 | 186.49 | 57.5400 | 53.320 | 552.78 | 619.30 | 130.76 | ... | 26.49 | 46.12 | 12.37 | 96.14 | 214.14 | 104.75 | 4.58 | 6.23 | 141.45 | 39.36 |
| 2021-04-13 | 2267.2700 | 2254.4299 | 309.76 | 29.6300 | 185.49 | 57.6200 | 53.875 | 553.73 | 621.29 | 130.79 | ... | 26.47 | 46.37 | 12.29 | 97.54 | 216.56 | 106.21 | 4.53 | 6.20 | 142.11 | 39.50 |
7 rows × 23 columns
====================================================================================================
The returns can be analyzed using the percentage change from the adj Close.
Tag = All_data.tail(7).index[[0,-1]].astype(str).tolist()
Header('Returns (From %s To %s)' % (Tag[0],Tag[1]), C = 'Cyan', T = 'Black')
display(All_returns.tail(7).round(4))
Temp = pd.melt(All_returns.reset_index(drop = False), id_vars=['Date'],
value_vars=list(set(All_returns.columns)-{'Date'}),
var_name='Symbols', value_name='Daily Return')
Temp['Symbols'] = Temp['Symbols'].replace(Symbols_Dic)
fig = px.line(Temp, x='Date', y='Daily Return', color = 'Symbols')
fig.update_xaxes(rangeslider_visible=True, rangeslider =dict(bgcolor = 'WhiteSmoke'),
rangeselector=dict(bgcolor='WhiteSmoke', buttons=list([
dict(count=1, label='One Day', step='day', stepmode='todate'),
dict(count=7, label='One Week', step='day', stepmode='todate'),
dict(count=1, label='One Month', step='month', stepmode='todate'),
dict(count=6, label='Six Months', step='month', stepmode='todate'),
dict(count=1, label='This Year', step='year', stepmode='todate'),dict(step='all')])))
fig.update_layout(plot_bgcolor= 'white')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_layout(title={'text': '<b>' + 'Daily Returns' + '<b>',
'x':.5, 'y': .98, 'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Returns (From 2021-04-05 To 2021-04-13) ============================================================
| Symbols | GOOG | GOOGL | FB | T | DIS | VZ | CMCSA | NFLX | CHTR | TMUS | ... | BCE | ORAN | ATVI | BIDU | NTES | TEF | CHA | SIRI | EA | CHT |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||||||||||
| 2021-04-05 | 0.0411 | 0.0419 | 0.0343 | 0.0079 | -0.0025 | 0.0098 | 0.0009 | 0.0023 | -0.0051 | 0.0294 | ... | 0.0057 | 0.0170 | 0.0244 | 0.0118 | -0.0099 | 0.0155 | 0.0 | 0.0048 | 0.0259 | 0.0097 |
| 2021-04-06 | -0.0004 | -0.0044 | -0.0086 | 0.0088 | 0.0065 | 0.0041 | 0.0053 | 0.0071 | 0.0130 | 0.0002 | ... | 0.0007 | -0.0119 | -0.0056 | 0.0192 | 0.0164 | -0.0109 | 0.0 | 0.0063 | -0.0018 | -0.0005 |
| 2021-04-07 | 0.0112 | 0.0135 | 0.0223 | -0.0016 | -0.0114 | -0.0019 | -0.0044 | 0.0045 | -0.0029 | -0.0107 | ... | 0.0009 | 0.0056 | -0.0049 | -0.0201 | -0.0362 | 0.0088 | 0.0 | -0.0016 | 0.0052 | 0.0030 |
| 2021-04-08 | 0.0070 | 0.0051 | -0.0002 | -0.0135 | -0.0013 | -0.0132 | -0.0049 | 0.0139 | 0.0058 | 0.0007 | ... | 0.0009 | 0.0056 | -0.0039 | 0.0215 | 0.0149 | 0.0022 | 0.0 | -0.0032 | -0.0032 | -0.0081 |
| 2021-04-09 | 0.0090 | 0.0090 | -0.0018 | 0.0013 | 0.0030 | -0.0019 | -0.0140 | 0.0013 | -0.0015 | -0.0083 | ... | 0.0054 | -0.0048 | -0.0070 | -0.0313 | -0.0094 | -0.0065 | 0.0 | 0.0063 | -0.0077 | 0.0028 |
| 2021-04-12 | -0.0136 | -0.0115 | -0.0029 | -0.0027 | -0.0075 | 0.0009 | -0.0047 | -0.0046 | 0.0082 | 0.0134 | ... | -0.0022 | -0.0104 | 0.0038 | -0.0252 | 0.0078 | 0.0022 | 0.0 | -0.0189 | 0.0070 | -0.0010 |
| 2021-04-13 | 0.0055 | 0.0044 | -0.0057 | -0.0110 | -0.0054 | 0.0014 | 0.0104 | 0.0017 | 0.0032 | 0.0002 | ... | 0.0054 | -0.0065 | 0.0146 | 0.0113 | 0.0139 | -0.0109 | 0.0 | -0.0048 | 0.0047 | 0.0036 |
7 rows × 24 columns
The following graphs show the correlation between different stocks.
_ = sns.jointplot('GOOG','GOOGL', All_returns, kind='reg', space=0, size=6, ratio=4)
_ = sns.jointplot('GOOGL','BIDU', All_returns, kind='reg', space=0, size=6, ratio=4)
Now, we can use the pairplot tool to visualize all.
# Remove missing values
Temp = TopN_volumes(8, df = Ave_df)
Temp = All_returns[Temp].dropna()
# plot
_ = sns.pairplot(Temp, diag_kind='kde')
Nonetheless, the correlation matrix and plot are always convenient to see numerical values for correlations.
# Correlation Matrix
Cor_matrix = Temp.corr()
display(Cor_matrix.style.set_caption('Correlation Matrix')\
.background_gradient(cmap=sns.diverging_palette(5, 250, as_cmap=True)))
def Correlation_Plot (Df,Fig_Size):
Correlation_Matrix = Df.corr()
mask = np.zeros_like(Correlation_Matrix)
mask[np.triu_indices_from(mask)] = True
for i in range(len(mask)):
mask[i,i]=0
Fig, ax = plt.subplots(figsize=(Fig_Size,Fig_Size))
sns.heatmap(Correlation_Matrix, ax=ax, mask=mask, annot=True, square=True,
cmap =sns.diverging_palette(5, 250, as_cmap=True), linewidths = 0.2, vmin=0, vmax=1, cbar_kws={"shrink": .5})
bottom, top = ax.get_ylim()
_ = ax.set_ylim(bottom + 0.5, top - 0.5)
Correlation_Plot (Temp, 10)
| Symbols | T | SIRI | FB | CMCSA | VZ | DIS | NFLX | ATVI |
|---|---|---|---|---|---|---|---|---|
| Symbols | ||||||||
| T | 1.000000 | 0.435569 | 0.244384 | 0.550447 | 0.660776 | 0.484567 | 0.144480 | 0.160600 |
| SIRI | 0.435569 | 1.000000 | 0.414805 | 0.489923 | 0.284895 | 0.428977 | 0.299586 | 0.293716 |
| FB | 0.244384 | 0.414805 | 1.000000 | 0.354370 | 0.201552 | 0.349036 | 0.469529 | 0.446290 |
| CMCSA | 0.550447 | 0.489923 | 0.354370 | 1.000000 | 0.449773 | 0.589540 | 0.276134 | 0.321856 |
| VZ | 0.660776 | 0.284895 | 0.201552 | 0.449773 | 1.000000 | 0.343187 | 0.118805 | 0.184146 |
| DIS | 0.484567 | 0.428977 | 0.349036 | 0.589540 | 0.343187 | 1.000000 | 0.227028 | 0.211305 |
| NFLX | 0.144480 | 0.299586 | 0.469529 | 0.276134 | 0.118805 | 0.227028 | 1.000000 | 0.454578 |
| ATVI | 0.160600 | 0.293716 | 0.446290 | 0.321856 | 0.184146 | 0.211305 | 0.454578 | 1.000000 |
Here, darker shades of blue represent a higher correlation.
Temp = All_returns.mean().to_frame('Expected Return').join(All_returns.std().to_frame('Risk')).reset_index(drop = False)
Temp['Symbols'] = Temp['Symbols'].replace(Symbols_Dic)
fig = px.scatter(Temp, x= 'Expected Return', y='Risk', color= 'Symbols', hover_data=Temp.columns)
fig.update_traces(marker_line_color= 'Black', marker_line_width=1, opacity=.5, showlegend = True)
# Update
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
zeroline=False, zerolinewidth=1, zerolinecolor='Black', range= [-0.001,0.002],
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True, range= [0,0.03],
zeroline=False, zerolinewidth=1, zerolinecolor='Black', title_text='Value',
showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_layout(legend_orientation='v', plot_bgcolor= 'white', height= 500)
fig.show()
print("""The current trend seems to output a value between %.2e and %.2e.
We would like to identify a stock with high return and low risk!
""" % (Temp['Expected Return'].min(),Temp['Expected Return'].max()))
The current trend seems to output a value between -2.47e-04 and 1.55e-03. We would like to identify a stock with high return and low risk!
We can find the quantile GOOG.
qt = All_returns['GOOG'].quantile(0.05)
qt_pct = abs(All_returns['GOOG'].quantile(0.05))*100
print('Quantile Percentage: %0.4f' % qt_pct)
print("""The 0.05 empirical quantile of daily returns is at {0:.2f}%.
This means that with 95% confidence, the worst daily loss will not exceed {0:.2f}% (of the investment)."""
.format(qt,qt_pct))
Quantile Percentage: 2.6156
The 0.05 empirical quantile of daily returns is at -0.03%.
This means that with 95% confidence, the worst daily loss will not exceed -0.03% (of the investment).
To predict future behaviors, we can implement the Monte Carlo method (also see this [2] and this [3].
# consider a year
days = 365
# Delta t
dt = 1/365
Defining a Monte Carlo function fo the Stock price.
def stock_monte_carlo(start_price, days, mu, sigma):
'''
Function takes in stock price, number of days to run, mean and standard deviation values'''
# An emptry array
price = np.zeros(days)
# Initial Price
price[0] = start_price
# Shock and drift
shock = np.zeros(days)
drift = np.zeros(days)
# Formula: New price = Old price + Old price*(shock + drift)
for x in range(1,days):
shock[x] = np.random.normal(loc=mu*dt,scale=sigma*np.sqrt(dt))
drift[x] = mu * dt
price[x] = price[x-1] + (price[x-1] * (drift[x]+shock[x]))
return price
def Monte_Carlo_Analysis(Inp, mu, sigma, N=1e2, days = days, Data = Data):
# get the data for each symbol
df = Data[Data.Symbol == Inp].drop(columns=['Symbol'])
# set the last entry of the open column as the starting price
start_price = df['Open'][-1]
# Ouput Figure
N = int(N)
fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize=(16, 8))
for run in range(100):
_ = plt.plot(stock_monte_carlo(start_price, days, mu, sigma))
_ = ax.set_xlabel('Days')
_ = ax.set_ylabel('Price')
_ = ax.set_title('Monte Carlo Analysis for %s' % Symbols_Dic[Inp], weight='bold', fontsize = 16)
_ = ax.set_xlim([0,days])
return df
def Final_price_distribution_simulations(Inp, mu, sigma, N= 1e4, days = days, Data = Data):
# get the data for each symbol
df = Data[Data.Symbol == Inp].drop(columns=['Symbol'])
# set the last entry of the open column as the starting price
start_price = df['Open'][-1]
# Simulations array
N = int(N)
simulations = np.zeros(N)
# Progress Bar
Counter = 0
Progress_Bar = progressbar.ProgressBar(maxval= N, widgets=[progressbar.Bar('#', '|', '|'), progressbar.Percentage()])
Progress_Bar.start()
for i in range(N):
simulations[i] = stock_monte_carlo(start_price, days, mu, sigma)[days-1]
Counter+=1
Progress_Bar.update(Counter)
Progress_Bar.finish()
return simulations
def Final_price_distribution_plot(simulations, Inp, Data = Data):
# get the data for Inp teach
df = Data[Data.Symbol == Inp].drop(columns=['Symbol'])
# set the last entry of the open column as the starting price
start_price = df['Open'][-1]
# Output Figure
fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize=(16, 8))
q = np.percentile(simulations, 1)
_ = ax.hist(simulations, bins='auto', color = '#34495e')
_ = plt.figtext(0.75, 0.80, "Start price: $%.2f" % start_price, fontsize = 12)
_ = plt.figtext(0.75, 0.75, "Mean final price: $%.2f" % simulations.mean(), fontsize = 12)
_ = plt.figtext(0.75, 0.70, "VaR(0.99): $%.2f" % (start_price -q,), fontsize = 12)
_ = plt.figtext(0.15,0.665, "q(0.99): $%.2f" % q, fontsize = 12)
_ = ax.set_xlim()
_ = ax.axvline(x=q, linewidth=4, color='#e74c3c')
_ = ax.set_title("Final price distribution for %s after %s days" % (Symbols_Dic[Inp], days), weight='bold', fontsize = 16)
Symbol = 'FB'
# mean
mu = All_returns.mean()[Symbol]
# standard deviation
sigma = All_returns.std()[Symbol]
_ = Monte_Carlo_Analysis(Symbol, mu = mu, sigma = sigma)
The frequencies of different outcomes simulated form a Bell curve. The most likely return is in the middle of the curve. This means there is an equal chance that the actual return will be higher or lower than that value.
Simulations = Final_price_distribution_simulations(Symbol, mu = mu, sigma = sigma)
Final_price_distribution_plot(Simulations, Symbol)
|#########################################################################|100%
See more details about Value at Risk (VaR) [4].